I] Arrange Data

class(iris$Species)
[1] "factor"
iris$Species[1:5]
[1] setosa setosa setosa setosa setosa
Levels: setosa versicolor virginica
duplicated(iris)
  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [19] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [55] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [91] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[127] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE
[145] FALSE FALSE FALSE FALSE FALSE FALSE
# Cleaning Data
# Mutate
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
mutate(iris, aspect_ration = Petal.Width/Petal.Length)

# Subset
subset(iris, Sepal.Length > 7)

# Aggregate
aggregate(Sepal.Length ~ Species, data = iris, FUN = 'mean')
aggregate(Sepal.Length ~ Species, data = iris, FUN = 'length')
aggregate(Species ~ Sepal.Length, data = iris, FUN = 'length')

# Group by & Summarize
library(DT)
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
new_iris <- iris %>% group_by(Species) %>% summarise(Avg_Sepal_Length = mean(Sepal.Length), SD_Sepal_Length = sd(Sepal.Length), Avg_Sepal_Width = mean(Sepal.Width), SD_Sepal_Width = sd(Sepal.Width), Avg_Petal_Length = mean(Petal.Length), SD_Petal_Length = sd(Petal.Length), Avg_Petal_Width = mean(Petal.Width), SD_Petal_Width = sd(Petal.Width))
datatable(new_iris, caption = "Mean & Standard Deviation") %>% formatRound(2:5, digits = 2)

II] Arrange Table

library(ggplot2)
library(plotly)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     

Attaching package: ‘plotly’

The following object is masked from ‘package:ggplot2’:

    last_plot

The following object is masked from ‘package:stats’:

    filter

The following object is masked from ‘package:graphics’:

    layout
head(iris, 10)
count(iris, Species)
# Scatter Plot
p1 <- ggplot(iris, aes(x = Sepal.Length, y = Petal.Length, color = Species, shape = Species)) + geom_point(size = 3) + ggtitle('Petal Length vs Sepal Length') + geom_smooth(method = 'lm')
ggplotly(p1)
`geom_smooth()` using formula 'y ~ x'
# Generalized Pair Plot
library(GGally)
p2 <- ggpairs(iris, columns = 1:4, aes(color = Species)) + ggtitle("Anderson's Iris Dataset --- 3 Species")
p2

 plot: [1,1] [=>------------------------------------]  6% est: 0s 
 plot: [1,2] [====>---------------------------------] 12% est: 3s 
 plot: [1,3] [======>-------------------------------] 19% est: 2s 
 plot: [1,4] [=========>----------------------------] 25% est: 2s 
 plot: [2,1] [===========>--------------------------] 31% est: 1s 
 plot: [2,2] [=============>------------------------] 38% est: 1s 
 plot: [2,3] [================>---------------------] 44% est: 1s 
 plot: [2,4] [==================>-------------------] 50% est: 1s 
 plot: [3,1] [====================>-----------------] 56% est: 1s 
 plot: [3,2] [=======================>--------------] 62% est: 1s 
 plot: [3,3] [=========================>------------] 69% est: 1s 
 plot: [3,4] [===========================>----------] 75% est: 0s 
 plot: [4,1] [==============================>-------] 81% est: 0s 
 plot: [4,2] [================================>-----] 88% est: 0s 
 plot: [4,3] [===================================>--] 94% est: 0s 
 plot: [4,4] [======================================]100% est: 0s 
                                                                  

p3 <- pairs(iris[1:4], lower.panel = NULL, main = "Anderson's Iris Data -- 3 species", pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)])

p3
NULL
# Parallel Coordinate Plot
p4 <- ggparcoord(data = iris, columns = 1:4, groupColumn = "Species")
ggplotly(p4)

III] Other Plots

# Histogram
p5 <- ggplot(data = iris, aes(x = Sepal.Length)) + geom_histogram(binwidth = 0.2, color="black", aes(fill = Species)) + xlab("Sepal Length") + ylab("Frequency") + ggtitle("Histogram of Sepal Length") + geom_vline(data = iris, aes(xintercept = mean(Sepal.Length)), linetype = "dashed", color="grey")
ggplotly(p5)
# Density Plot
p6 <- ggplot(iris, aes(x = Sepal.Width, colour = Species, fill = Species)) + geom_density(alpha = .5) + geom_vline(aes(xintercept = mean(Sepal.Width)), linetype = "dashed", color = "grey", size = 1) + xlab("Sepal Width") + ylab("Density")
ggplotly(p6)
# Box Plot
p7 <- ggplot(iris, aes(Species, Petal.Length, fill = Species)) + geom_boxplot() + scale_y_continuous("Petal Length", breaks = seq(0, 10, by = .5)) + labs(title = "Iris Petal Length Box Plot", x = "Species")
ggplotly(p7)
# Violin Plot
p8 <- ggplot(iris, aes(Species, Petal.Width, fill = Species)) + scale_y_continuous("Petal Width", breaks = seq(0, 10, by = .5)) + labs(title = "Iris Petal Width Violin Plot", x = "Species") + geom_violin(aes(color = Species), alpha = .75)
ggplotly(p8)
LS0tDQp0aXRsZTogImlyaXMgRGF0YXNldCB3aXRoIGludGVyYWN0aXZpdHkiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KSV0gQXJyYW5nZSBEYXRhDQpgYGB7cn0NCmNsYXNzKGlyaXMkU3BlY2llcykNCmlyaXMkU3BlY2llc1sxOjVdDQpkdXBsaWNhdGVkKGlyaXMpDQpgYGANCmBgYHtyfQ0KIyBDbGVhbmluZyBEYXRhDQojIE11dGF0ZQ0KbGlicmFyeShkcGx5cikNCm11dGF0ZShpcmlzLCBhc3BlY3RfcmF0aW9uID0gUGV0YWwuV2lkdGgvUGV0YWwuTGVuZ3RoKQ0KDQojIFN1YnNldA0Kc3Vic2V0KGlyaXMsIFNlcGFsLkxlbmd0aCA+IDcpDQoNCiMgQWdncmVnYXRlDQphZ2dyZWdhdGUoU2VwYWwuTGVuZ3RoIH4gU3BlY2llcywgZGF0YSA9IGlyaXMsIEZVTiA9ICdtZWFuJykNCmFnZ3JlZ2F0ZShTZXBhbC5MZW5ndGggfiBTcGVjaWVzLCBkYXRhID0gaXJpcywgRlVOID0gJ2xlbmd0aCcpDQphZ2dyZWdhdGUoU3BlY2llcyB+IFNlcGFsLkxlbmd0aCwgZGF0YSA9IGlyaXMsIEZVTiA9ICdsZW5ndGgnKQ0KDQojIEdyb3VwIGJ5ICYgU3VtbWFyaXplDQpsaWJyYXJ5KERUKQ0KbmV3X2lyaXMgPC0gaXJpcyAlPiUgZ3JvdXBfYnkoU3BlY2llcykgJT4lIHN1bW1hcmlzZShBdmdfU2VwYWxfTGVuZ3RoID0gbWVhbihTZXBhbC5MZW5ndGgpLCBTRF9TZXBhbF9MZW5ndGggPSBzZChTZXBhbC5MZW5ndGgpLCBBdmdfU2VwYWxfV2lkdGggPSBtZWFuKFNlcGFsLldpZHRoKSwgU0RfU2VwYWxfV2lkdGggPSBzZChTZXBhbC5XaWR0aCksIEF2Z19QZXRhbF9MZW5ndGggPSBtZWFuKFBldGFsLkxlbmd0aCksIFNEX1BldGFsX0xlbmd0aCA9IHNkKFBldGFsLkxlbmd0aCksIEF2Z19QZXRhbF9XaWR0aCA9IG1lYW4oUGV0YWwuV2lkdGgpLCBTRF9QZXRhbF9XaWR0aCA9IHNkKFBldGFsLldpZHRoKSkNCmRhdGF0YWJsZShuZXdfaXJpcywgY2FwdGlvbiA9ICJNZWFuICYgU3RhbmRhcmQgRGV2aWF0aW9uIikgJT4lIGZvcm1hdFJvdW5kKDI6NSwgZGlnaXRzID0gMikNCmBgYA0KSUldIEFycmFuZ2UgVGFibGUNCmBgYHtyfQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShwbG90bHkpDQpoZWFkKGlyaXMsIDEwKQ0KY291bnQoaXJpcywgU3BlY2llcykNCmBgYA0KYGBge3J9DQojIFNjYXR0ZXIgUGxvdA0KcDEgPC0gZ2dwbG90KGlyaXMsIGFlcyh4ID0gU2VwYWwuTGVuZ3RoLCB5ID0gUGV0YWwuTGVuZ3RoLCBjb2xvciA9IFNwZWNpZXMsIHNoYXBlID0gU3BlY2llcykpICsgZ2VvbV9wb2ludChzaXplID0gMykgKyBnZ3RpdGxlKCdQZXRhbCBMZW5ndGggdnMgU2VwYWwgTGVuZ3RoJykgKyBnZW9tX3Ntb290aChtZXRob2QgPSAnbG0nKQ0KZ2dwbG90bHkocDEpDQpgYGANCmBgYHtyfQ0KIyBHZW5lcmFsaXplZCBQYWlyIFBsb3QNCmxpYnJhcnkoR0dhbGx5KQ0KcDIgPC0gZ2dwYWlycyhpcmlzLCBjb2x1bW5zID0gMTo0LCBhZXMoY29sb3IgPSBTcGVjaWVzKSkgKyBnZ3RpdGxlKCJBbmRlcnNvbidzIElyaXMgRGF0YXNldCAtLS0gMyBTcGVjaWVzIikNCnAyDQpwMyA8LSBwYWlycyhpcmlzWzE6NF0sIGxvd2VyLnBhbmVsID0gTlVMTCwgbWFpbiA9ICJBbmRlcnNvbidzIElyaXMgRGF0YSAtLSAzIHNwZWNpZXMiLCBwY2ggPSAyMSwgYmcgPSBjKCJyZWQiLCAiZ3JlZW4zIiwgImJsdWUiKVt1bmNsYXNzKGlyaXMkU3BlY2llcyldKQ0KcDMNCmBgYA0KYGBge3J9DQojIFBhcmFsbGVsIENvb3JkaW5hdGUgUGxvdA0KcDQgPC0gZ2dwYXJjb29yZChkYXRhID0gaXJpcywgY29sdW1ucyA9IDE6NCwgZ3JvdXBDb2x1bW4gPSAiU3BlY2llcyIpDQpnZ3Bsb3RseShwNCkNCmBgYA0KSUlJXSBPdGhlciBQbG90cyANCmBgYHtyfQ0KIyBIaXN0b2dyYW0NCnA1IDwtIGdncGxvdChkYXRhID0gaXJpcywgYWVzKHggPSBTZXBhbC5MZW5ndGgpKSArIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMC4yLCBjb2xvcj0iYmxhY2siLCBhZXMoZmlsbCA9IFNwZWNpZXMpKSArIHhsYWIoIlNlcGFsIExlbmd0aCIpICsgeWxhYigiRnJlcXVlbmN5IikgKyBnZ3RpdGxlKCJIaXN0b2dyYW0gb2YgU2VwYWwgTGVuZ3RoIikgKyBnZW9tX3ZsaW5lKGRhdGEgPSBpcmlzLCBhZXMoeGludGVyY2VwdCA9IG1lYW4oU2VwYWwuTGVuZ3RoKSksIGxpbmV0eXBlID0gImRhc2hlZCIsIGNvbG9yPSJncmV5IikNCmdncGxvdGx5KHA1KQ0KYGBgDQpgYGB7cn0NCiMgRGVuc2l0eSBQbG90DQpwNiA8LSBnZ3Bsb3QoaXJpcywgYWVzKHggPSBTZXBhbC5XaWR0aCwgY29sb3VyID0gU3BlY2llcywgZmlsbCA9IFNwZWNpZXMpKSArIGdlb21fZGVuc2l0eShhbHBoYSA9IC41KSArIGdlb21fdmxpbmUoYWVzKHhpbnRlcmNlcHQgPSBtZWFuKFNlcGFsLldpZHRoKSksIGxpbmV0eXBlID0gImRhc2hlZCIsIGNvbG9yID0gImdyZXkiLCBzaXplID0gMSkgKyB4bGFiKCJTZXBhbCBXaWR0aCIpICsgeWxhYigiRGVuc2l0eSIpDQpnZ3Bsb3RseShwNikNCmBgYA0KYGBge3J9DQojIEJveCBQbG90DQpwNyA8LSBnZ3Bsb3QoaXJpcywgYWVzKFNwZWNpZXMsIFBldGFsLkxlbmd0aCwgZmlsbCA9IFNwZWNpZXMpKSArIGdlb21fYm94cGxvdCgpICsgc2NhbGVfeV9jb250aW51b3VzKCJQZXRhbCBMZW5ndGgiLCBicmVha3MgPSBzZXEoMCwgMTAsIGJ5ID0gLjUpKSArIGxhYnModGl0bGUgPSAiSXJpcyBQZXRhbCBMZW5ndGggQm94IFBsb3QiLCB4ID0gIlNwZWNpZXMiKQ0KZ2dwbG90bHkocDcpDQpgYGANCmBgYHtyfQ0KIyBWaW9saW4gUGxvdA0KcDggPC0gZ2dwbG90KGlyaXMsIGFlcyhTcGVjaWVzLCBQZXRhbC5XaWR0aCwgZmlsbCA9IFNwZWNpZXMpKSArIHNjYWxlX3lfY29udGludW91cygiUGV0YWwgV2lkdGgiLCBicmVha3MgPSBzZXEoMCwgMTAsIGJ5ID0gLjUpKSArIGxhYnModGl0bGUgPSAiSXJpcyBQZXRhbCBXaWR0aCBWaW9saW4gUGxvdCIsIHggPSAiU3BlY2llcyIpICsgZ2VvbV92aW9saW4oYWVzKGNvbG9yID0gU3BlY2llcyksIGFscGhhID0gLjc1KQ0KZ2dwbG90bHkocDgpDQpgYGA=